# -*- coding: utf-8 -*-  

import httplib,re
import MySQLdb
##########
def parserLink(data):
  p = re.compile("<a .*</span>")
  links = p.findall(data)
  res=list()
  for i in links:
    #tmp = i.encode("UTF-8")
    #print type(tmp), tmp
    p = re.compile(".*href=\"(.*?)\".*>(.*)</a><span.*>(.*)</span>")
    m = p.match(i)
    if m:
      res.append([m.group(1), m.group(2), m.group(3)])
  return res

def saveDB(name, data):
  for i in data:
    conn=MySQLdb.connect(host="10.241.84.65",user="root",db="stock")
    cursor = conn.cursor()
    sql = "insert into stock_news values('%s', '%s', '%s', '%s')" % (name, i[0], i[1], i[2])
    print sql
    cursor.execute(sql)
    cursor.close()
    conn.close()

conn=MySQLdb.connect(host="10.241.84.65",user="root",db="stock")
cursor = conn.cursor()
sql="delete from stock_news"
cursor.execute(sql)
cursor.close()
conn.close()

conn = httplib.HTTPConnection("news.baidu.com")
conn.request("GET", "/n?cmd=1&class=stock&pn=1")
r1 = conn.getresponse()
data = r1.read()
data = data.decode("gb18030").encode("UTF-8")
#data = data.encode("UTF-8")
pos1 = data.find("news top ")
pos1 = data.find("<table", pos1)
pos2 = data.find("</table>", pos1)
res = parserLink(data[pos1:pos2])
saveDB("最新",res)

print ""
pos1 = data.find("<h2>大盘</h2>")
pos2 = data.find("</div>", pos1)
#print data[pos1:pos2]
res=parserLink(data[pos1:pos2])
saveDB("大盘",res)

print ""
pos1 = data.find("<h2>个股</h2>")
pos2 = data.find("</div>", pos1)
res=parserLink(data[pos1:pos2])
saveDB("个股",res)

print ""
pos1 = data.find("<h2>热点板块</h2>")
pos2 = data.find("</div>", pos1)
res=parserLink(data[pos1:pos2])
saveDB("热点板块",res)

print ""
pos1 = data.find("<h2>新股</h2>")
pos2 = data.find("</div>", pos1)
res=parserLink(data[pos1:pos2])
saveDB("新股",res)
